* Clean f1095As, calculate relevant variables
local yr = `1'
use "$dataDir`yr'/f1095a.dta", clear

* Calculate for all 1095As including duplicates
gen issn = wc033
gen numMoPrem_f1095A = 0
gen numMoSLCSP_f1095A = 0
gen numMoAPTC_f1095A = 0

forvalues i = 1/12 {
	local p = 900 + `i'
	local s = `p' + 20
	local a = `p' + 40
	
	gen moPrem_f1095A_`i' = ew`p'
	gen moSLCSP_f1095A_`i' = ew`s'
	gen moAPTC_f1095A_`i' = ew`a'
	
	replace numMoPrem_f1095A = numMoPrem_f1095A + 1 if moPrem_f1095A_`i' > 0
	replace numMoSLCSP_f1095A = numMoSLCSP_f1095A + 1 if moSLCSP_f1095A_`i' > 0
	replace numMoAPTC_f1095A = numMoAPTC_f1095A + 1 if moAPTC_f1095A_`i' > 0
}

egen moPrem_t_f1095A = rowtotal(moPrem_f1095A_*)
egen moSLCSP_t_f1095A = rowtotal(moSLCSP_f1095A_*)
egen moAPTC_t_f1095A = rowtotal(moAPTC_f1095A_*)
replace moPrem_t_f1095A = 0 if moPrem_t_f1095A == .
replace moSLCSP_t_f1095A = 0 if moSLCSP_t_f1095A == .
replace moAPTC_t_f1095A = 0 if moAPTC_t_f1095A == .

gen annPrem_f1095A = ew913
gen annSLCSP_f1095A = ew933
gen annAPTC_f1095A = ew953
replace annPrem_f1095A = 0 if annPrem_f1095A == .
replace annSLCSP_f1095A = 0 if annSLCSP_f1095A == .
replace annAPTC_f1095A = 0 if annAPTC_f1095A == .

replace annPrem_f1095A = max(annPrem_f1095A, moPrem_t_f1095A)
replace annSLCSP_f1095A = max(annSLCSP_f1095A, moSLCSP_t_f1095A)
replace annAPTC_f1095A = max(annAPTC_f1095A, moAPTC_t_f1095A)

* Annuals approx equal rowtotal of monthlies (within rounding error), so adjust to annual amounts
gen annPrem_fte_f1095A = ew913 * (12 / numMoPrem_f1095A)
gen annSLCSP_fte_f1095A = ew933 * (12 / numMoPrem_f1095A)
gen annAPTC_fte_f1095A = ew953 * (12 / numMoPrem_f1095A)

* Do not use records with accept == 0
drop if accept == 0
* Flag nonfilers
gen inonfiler = (recid > 900000)

* Drop original returns when an amended return is present
gen amended = (wc041 == "G")
bysort flpdyr recid policy_num issn: egen tempTot = total(amended)
drop if (wc041 == "F") & (tempTot > 0)
drop tempTot amended

gen famSize_f1095A = (tp_cov_ind == "Y") + (sp_cov_ind == "Y")
forvalues i = 1/7 {
	replace famSize_f1095A = famSize_f1095A + (dep`i'_cov_ind == "Y")
}
gen numDep_f1095A = 0
replace numDep_f1095A = 1 if (sp_cov_ind == "Y")
forvalues i = 1/7 {
	replace numDep_f1095A = numDep_f1095A + 1 if (dep`i'_cov_ind == "Y")	
}

if(flpdyr < 2017) {
	gen f1095_ssnind = 1
}
gen fte_f1095A = (famSize_f1095A * numMoPrem_f1095A) / 12 if f1095_ssnind == 1

* In case not all years have mars
capture gen mars = 1
* Assume single if not already assigned
replace mars = 1 if mars == .
* Assign to married if there is a spouse on the 1095A
replace mars = 2 if sp_cov_ind == "Y"
* Assume HoH if single and numDep > 0
replace mars = 4 if mars == 1 & numDep_f1095A
bysort recid: egen temp = max(mars)
replace mars = temp
drop temp

gen tempNumCovIndiv = -famSize_f1095A
gen tempnumMoPrem_f1095A = - numMoPrem_f1095A

if(flpdyr < 2017) {
	bysort flpdyr recid policy_num (famSize_f1095A numMoPrem_f1095A): gen nval = _n
}
else {
	bysort flpdyr recid policy_num (f1095_ssnind famSize_f1095A numMoPrem_f1095A): gen nval = _n
}
drop if nval > 1
drop temp*
	
gen TPCovInd = (tp_cov_ind == "Y")
gen stateExchID = substr(exchange_id, 4, 2)
encode stateExchID, gen(state_f1095A)	
gen irmf = 1
gen soiYr = 20`yr'
gen temp = string(recid) + string(flpdyr)
destring temp, gen(uid_irmf)
drop temp

tostring wc033, gen(str_ssn)
gen temp_invalid = strpos(str_ssn, "12345678")
bysort recid: gen temp = _n 
replace recid = 10000000 + temp if temp_invalid == 1
drop str_ssn temp*
drop if f1095_ssnind > 1

preserve
save "$dataDir/f1095A_no_collapse_`yr'.dta", replace
restore
bysort recid: gen numPols = _N

collapse (sum) annPrem_* annAPTC_* fte_f1095A (max) annSLCSP_* famSize_f1095A mars ssnind (min)f1095_ssnind (firstnm) issn wc033 stateExchID irmf soiYr uid_irmf numPols, by(flpdyr recid) fast

save "$dataDir/f1095A_`yr'.dta", replace
